library(DiceKriging)
mat2list <- function(X){
# Turns the p columns of a matrix into a p length list,
# with each column becoming an element of the list
out <- vector(mode = 'list', length = ncol(X))
for(i in 1:ncol(X)){
out[[i]] <- X[ , i]
}
out
}
# Can this go in common data? Would be needed for checking emulator fits
# Create fit lists for the combined data wave00 level 1a and wave01
#Y_const_level1a_wave01_scaled_list <- mat2list(Y_const_level1a_wave01_scaled)
#fit_list_const_level1a_wave01 <- mclapply(X = Y_const_level1a_wave01_scaled_list , FUN = km, formula = ~., design = X_level1a_wave01,
# mc.cores = 4, control = list(trace = FALSE))
reset <- function() {
# Allows annotation of graphs, resets axes
par(mfrow=c(1, 1), oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
plot(0:1, 0:1, type="n", xlab="", ylab="", axes=FALSE)
}
makeTransparent<-function(someColor, alpha=100)
# Transparent colours for plotting
{
newColor<-col2rgb(someColor)
apply(newColor, 2, function(curcoldata){rgb(red=curcoldata[1], green=curcoldata[2],
blue=curcoldata[3],alpha=alpha, maxColorValue=255)})
}
load('data/ensemble-jules-historical-timeseries.RData')
count_na <- function(x){
sum(is.na(x))
}
count_na(npp_ens_wave00)
## [1] 0
ens_charvec <- ls(pattern = "ens_wave00")
for(i in ens_charvec){
print(count_na(get(i)))
}
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
# How about we look at post 1950? Maybe that is messing it up?
years_ix <- 101:164
train_ix <- (1:399)[-288] # Number 288 has numerical problems for NPP
test_ix <- 400:499
X_train <- X[train_ix, ]
X_test <- X[test_ix, ]
Y_train <- npp_ens_wave00[train_ix, years_ix ]
Y_test <- npp_ens_wave00[test_ix, years_ix]
years_trunc <- years[years_ix]
matplot(years_trunc, t(Y_train), type = 'l', lty = 'solid', col = 'black', xlab = 'years', ylab = 'training output', ylim = c(0,180))
## Perform PCA
Reconstruct the training data with the truncated number of PCs - this is the smallest error we can expect with an emulation.
# perform a PCA on the training output
pca <- prcomp(Y_train, center = TRUE, scale = TRUE)
plot(pca)
# How many principle components do we wish to keep?
npc <- 3
scores <- pca$x[ ,1:npc]
# project the truncated scores back up, to see how well they reproduce the
# original data
anom <- pca$rotation[ ,1:npc] %*% t(scores)*pca$scale
tens <- t(anom + pca$center)
matplot(years_trunc, t(Y_train), type = 'l', lty = 'solid', col = 'black', xlab = 'years', ylab = 'training output', ylim = c(0,180))
matlines(years_trunc, t(tens), lty = 'solid', col = 'red', xlab = 'years', ylab = 'training output')
# Plot the reconstruction error
Y_train_err <- tens - Y_train
matplot(years_trunc, t(Y_train_err),
type = 'l',
lty = 'solid',
col = 'black',
xlab = 'years',
ylab = 'training output',
main = 'training reconstruction error - perfect scores',
ylim = c(-5,5)
)
scores_em_mean_test <- NULL
scores_em_sd_test <- NULL
scores_em_mean_train <- NULL
for(i in 1:npc){
#
y <- pca$x[,i]
fit <- km(~., design = X_train, response = y)
loo <- leaveOneOut.km(fit, type = 'UK', trend.reestim = TRUE)
pred_test <- predict(fit, newdata = X_test, type = 'UK')
scores_em_mean <- pred_test$mean
scores_em_sd <- pred_test$sd
scores_em_mean_test <- cbind(scores_em_mean_test, scores_em_mean)
scores_em_sd_test <- cbind(scores_em_sd_test, scores_em_sd)
scores_em_mean_train <- cbind(scores_em_mean_train, loo$mean)
}
##
## optimisation start
## ------------------
## * estimation method : MLE
## * optimisation method : BFGS
## * analytical gradient : used
## * trend model : ~alpha_io + a_wl_io + bio_hum_cn + b_wl_io + dcatch_dlai_io +
## dqcrit_io + dz0v_dh_io + f0_io + fd_io + g_area_io + g_root_io +
## g_wood_io + gs_nvg_io + hw_sw_io + kaps_roth + knl_io + lai_max_io +
## lai_min_io + lma_io + n_inorg_turnover + nmass_io + nr_io +
## retran_l_io + retran_r_io + r_grow_io + rootd_ft_io + sigl_io +
## sorp + tleaf_of_io + tlow_io + tupp_io + l_vg_soil
## * covariance model :
## - type : matern5_2
## - nugget : NO
## - parameters lower bounds : 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10
## - parameters upper bounds : 1.998344 2 1.994471 1.999927 2 2 2 1.996699 2 1.996789 2 2 2 2 1.999374 1.99598 1.998257 2 2 1.996693 1.998361 2 2 2 1.991674 2 2 1.986556 2 2 2 1.998477
## - best initial criterion value(s) : -1283.624
##
## N = 32, M = 5 machine precision = 2.22045e-16
## At X0, 0 variables are exactly at the bounds
## At iterate 0 f= 1283.6 |proj g|= 1.911
## At iterate 1 f = 1269 |proj g|= 1.8736
## At iterate 2 f = 1240.6 |proj g|= 1.7741
## At iterate 3 f = 1236.1 |proj g|= 1.9164
## At iterate 4 f = 1226.2 |proj g|= 1.8193
## ys=-2.199e+00 -gs= 9.530e+00, BFGS update SKIPPED
## At iterate 5 f = 1219.8 |proj g|= 1.5641
## ys=-5.732e-01 -gs= 6.016e+00, BFGS update SKIPPED
## At iterate 6 f = 1210 |proj g|= 1.6505
## ys=-1.732e+00 -gs= 8.869e+00, BFGS update SKIPPED
## At iterate 7 f = 1203.1 |proj g|= 1.7473
## ys=-2.964e-02 -gs= 6.788e+00, BFGS update SKIPPED
## At iterate 8 f = 1197.6 |proj g|= 1.8377
## At iterate 9 f = 1193.3 |proj g|= 1.8289
## At iterate 10 f = 1192.6 |proj g|= 1.8052
## At iterate 11 f = 1192.1 |proj g|= 1.8215
## At iterate 12 f = 1191.9 |proj g|= 1.7826
## At iterate 13 f = 1191.5 |proj g|= 1.7646
## At iterate 14 f = 1190.2 |proj g|= 1.8236
## At iterate 15 f = 1189.8 |proj g|= 1.7157
## At iterate 16 f = 1189.7 |proj g|= 1.6587
## At iterate 17 f = 1188.9 |proj g|= 1.8242
## At iterate 18 f = 1188.6 |proj g|= 1.8197
## At iterate 19 f = 1188.5 |proj g|= 1.5593
## At iterate 20 f = 1188.3 |proj g|= 1.5372
## At iterate 21 f = 1188.1 |proj g|= 1.5102
## At iterate 22 f = 1188 |proj g|= 1.5038
## At iterate 23 f = 1187.8 |proj g|= 1.8205
## At iterate 24 f = 1186.3 |proj g|= 1.8429
## At iterate 25 f = 1184.9 |proj g|= 1.8193
## At iterate 26 f = 1184 |proj g|= 1.7606
## At iterate 27 f = 1182.3 |proj g|= 1.7419
## At iterate 28 f = 1182 |proj g|= 1.7423
## At iterate 29 f = 1181.8 |proj g|= 1.815
## At iterate 30 f = 1181.7 |proj g|= 1.8134
## At iterate 31 f = 1181.6 |proj g|= 1.8126
## At iterate 32 f = 1181.4 |proj g|= 1.7473
## At iterate 33 f = 1181.2 |proj g|= 1.7455
## At iterate 34 f = 1181 |proj g|= 0.8349
## At iterate 35 f = 1180.8 |proj g|= 1.805
## At iterate 36 f = 1180.8 |proj g|= 0.97958
## At iterate 37 f = 1180.8 |proj g|= 0.94645
## At iterate 38 f = 1180.8 |proj g|= 0.90469
## At iterate 39 f = 1180.8 |proj g|= 0.79804
## At iterate 40 f = 1180.7 |proj g|= 0.89866
## At iterate 41 f = 1180.7 |proj g|= 1.7465
## At iterate 42 f = 1180.7 |proj g|= 0.2816
## At iterate 43 f = 1180.7 |proj g|= 0.27781
## At iterate 44 f = 1180.7 |proj g|= 0.26052
## At iterate 45 f = 1180.7 |proj g|= 0.26042
## At iterate 46 f = 1180.7 |proj g|= 0.25903
## At iterate 47 f = 1180.7 |proj g|= 0.4029
## At iterate 48 f = 1180.7 |proj g|= 0.33381
## At iterate 49 f = 1180.7 |proj g|= 0.44174
## At iterate 50 f = 1180.7 |proj g|= 1.1685
## At iterate 51 f = 1180.7 |proj g|= 0.56017
## At iterate 52 f = 1180.7 |proj g|= 0.37387
## At iterate 53 f = 1180.7 |proj g|= 0.19494
## At iterate 54 f = 1180.7 |proj g|= 0.12967
## At iterate 55 f = 1180.7 |proj g|= 0.10408
## At iterate 56 f = 1180.7 |proj g|= 0.11899
## At iterate 57 f = 1180.7 |proj g|= 0.18916
## At iterate 58 f = 1180.7 |proj g|= 0.11524
## At iterate 59 f = 1180.7 |proj g|= 0.10869
## At iterate 60 f = 1180.7 |proj g|= 0.09803
## At iterate 61 f = 1180.7 |proj g|= 0.01086
## At iterate 62 f = 1180.7 |proj g|= 0.047212
## At iterate 63 f = 1180.7 |proj g|= 0.051041
## At iterate 64 f = 1180.7 |proj g|= 0.1066
## At iterate 65 f = 1180.7 |proj g|= 0.061185
## At iterate 66 f = 1180.7 |proj g|= 0.10953
## At iterate 67 f = 1180.7 |proj g|= 0.053443
## At iterate 68 f = 1180.7 |proj g|= 0.0087198
## At iterate 69 f = 1180.7 |proj g|= 0.016103
## At iterate 70 f = 1180.7 |proj g|= 0.019865
##
## iterations 70
## function evaluations 87
## segments explored during Cauchy searches 89
## BFGS updates skipped 4
## active bounds at final generalized Cauchy point 26
## norm of the final projected gradient 0.0198651
## final function value 1180.69
##
## F = 1180.69
## final value 1180.686617
## converged
##
## optimisation start
## ------------------
## * estimation method : MLE
## * optimisation method : BFGS
## * analytical gradient : used
## * trend model : ~alpha_io + a_wl_io + bio_hum_cn + b_wl_io + dcatch_dlai_io +
## dqcrit_io + dz0v_dh_io + f0_io + fd_io + g_area_io + g_root_io +
## g_wood_io + gs_nvg_io + hw_sw_io + kaps_roth + knl_io + lai_max_io +
## lai_min_io + lma_io + n_inorg_turnover + nmass_io + nr_io +
## retran_l_io + retran_r_io + r_grow_io + rootd_ft_io + sigl_io +
## sorp + tleaf_of_io + tlow_io + tupp_io + l_vg_soil
## * covariance model :
## - type : matern5_2
## - nugget : NO
## - parameters lower bounds : 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10
## - parameters upper bounds : 1.998344 2 1.994471 1.999927 2 2 2 1.996699 2 1.996789 2 2 2 2 1.999374 1.99598 1.998257 2 2 1.996693 1.998361 2 2 2 1.991674 2 2 1.986556 2 2 2 1.998477
## - best initial criterion value(s) : 167.712
##
## N = 32, M = 5 machine precision = 2.22045e-16
## At X0, 0 variables are exactly at the bounds
## At iterate 0 f= -167.71 |proj g|= 1.6558
## At iterate 1 f = -170.3 |proj g|= 1.9572
## At iterate 2 f = -170.72 |proj g|= 1.9361
## At iterate 3 f = -170.84 |proj g|= 1.6593
## At iterate 4 f = -171.18 |proj g|= 1.6306
## At iterate 5 f = -171.65 |proj g|= 1.8433
## At iterate 6 f = -172.06 |proj g|= 1.7997
## At iterate 7 f = -172.62 |proj g|= 1.9289
## At iterate 8 f = -172.83 |proj g|= 1.2723
## At iterate 9 f = -173.77 |proj g|= 1.9021
## At iterate 10 f = -174.43 |proj g|= 1.871
## At iterate 11 f = -174.92 |proj g|= 1.8544
## ys=-4.728e-02 -gs= 4.708e-01, BFGS update SKIPPED
## At iterate 12 f = -177.77 |proj g|= 1.7932
## At iterate 13 f = -181.31 |proj g|= 1.6854
## At iterate 14 f = -187.68 |proj g|= 1.5381
## At iterate 15 f = -192.96 |proj g|= 1.6002
## At iterate 16 f = -194.38 |proj g|= 1.4773
## At iterate 17 f = -195.63 |proj g|= 1.6936
## At iterate 18 f = -196.47 |proj g|= 1.7131
## At iterate 19 f = -196.95 |proj g|= 1.6404
## At iterate 20 f = -197.67 |proj g|= 1.6986
## At iterate 21 f = -198.48 |proj g|= 1.5499
## At iterate 22 f = -198.8 |proj g|= 1.4856
## At iterate 23 f = -199.91 |proj g|= 1.7139
## At iterate 24 f = -201.15 |proj g|= 1.1927
## At iterate 25 f = -201.58 |proj g|= 1.0805
## At iterate 26 f = -201.92 |proj g|= 1.7084
## At iterate 27 f = -202.2 |proj g|= 1.6869
## At iterate 28 f = -202.34 |proj g|= 1.136
## At iterate 29 f = -202.47 |proj g|= 0.84481
## At iterate 30 f = -202.65 |proj g|= 0.99603
## At iterate 31 f = -202.78 |proj g|= 1.6903
## At iterate 32 f = -202.88 |proj g|= 1.6884
## At iterate 33 f = -202.96 |proj g|= 0.60062
## At iterate 34 f = -202.98 |proj g|= 0.40819
## At iterate 35 f = -203.06 |proj g|= 0.83351
## At iterate 36 f = -203.14 |proj g|= 0.93846
## At iterate 37 f = -203.17 |proj g|= 0.83267
## At iterate 38 f = -203.23 |proj g|= 0.79981
## At iterate 39 f = -203.25 |proj g|= 0.65264
## At iterate 40 f = -203.32 |proj g|= 0.4391
## At iterate 41 f = -203.34 |proj g|= 0.45939
## At iterate 42 f = -203.4 |proj g|= 0.54628
## At iterate 43 f = -203.43 |proj g|= 0.92779
## At iterate 44 f = -203.46 |proj g|= 0.65543
## At iterate 45 f = -203.49 |proj g|= 0.54152
## At iterate 46 f = -203.5 |proj g|= 0.1871
## At iterate 47 f = -203.5 |proj g|= 0.19936
## At iterate 48 f = -203.5 |proj g|= 0.3244
## At iterate 49 f = -203.51 |proj g|= 0.32018
## At iterate 50 f = -203.52 |proj g|= 0.33853
## At iterate 51 f = -203.52 |proj g|= 0.16434
## At iterate 52 f = -203.52 |proj g|= 0.28014
## At iterate 53 f = -203.52 |proj g|= 0.32037
## At iterate 54 f = -203.53 |proj g|= 0.55584
## At iterate 55 f = -203.53 |proj g|= 0.25956
## At iterate 56 f = -203.53 |proj g|= 0.14111
## At iterate 57 f = -203.53 |proj g|= 0.11963
## At iterate 58 f = -203.53 |proj g|= 0.15618
## At iterate 59 f = -203.54 |proj g|= 0.24496
## At iterate 60 f = -203.54 |proj g|= 0.11887
## At iterate 61 f = -203.54 |proj g|= 0.072762
## At iterate 62 f = -203.54 |proj g|= 0.029363
## At iterate 63 f = -203.54 |proj g|= 0.047519
## At iterate 64 f = -203.54 |proj g|= 0.039717
## At iterate 65 f = -203.54 |proj g|= 0.023013
## At iterate 66 f = -203.54 |proj g|= 0.022067
## At iterate 67 f = -203.54 |proj g|= 0.019308
## At iterate 68 f = -203.54 |proj g|= 0.014468
## At iterate 69 f = -203.54 |proj g|= 0.030188
## At iterate 70 f = -203.54 |proj g|= 0.015508
## At iterate 71 f = -203.54 |proj g|= 0.0046804
## At iterate 72 f = -203.54 |proj g|= 0.0041961
## At iterate 73 f = -203.54 |proj g|= 0.0052956
## At iterate 74 f = -203.54 |proj g|= 0.0084188
## At iterate 75 f = -203.54 |proj g|= 0.006971
## At iterate 76 f = -203.54 |proj g|= 0.0019931
## At iterate 77 f = -203.54 |proj g|= 0.0018218
##
## iterations 77
## function evaluations 84
## segments explored during Cauchy searches 88
## BFGS updates skipped 1
## active bounds at final generalized Cauchy point 19
## norm of the final projected gradient 0.00182179
## final function value -203.537
##
## F = -203.537
## final value -203.537366
## converged
##
## optimisation start
## ------------------
## * estimation method : MLE
## * optimisation method : BFGS
## * analytical gradient : used
## * trend model : ~alpha_io + a_wl_io + bio_hum_cn + b_wl_io + dcatch_dlai_io +
## dqcrit_io + dz0v_dh_io + f0_io + fd_io + g_area_io + g_root_io +
## g_wood_io + gs_nvg_io + hw_sw_io + kaps_roth + knl_io + lai_max_io +
## lai_min_io + lma_io + n_inorg_turnover + nmass_io + nr_io +
## retran_l_io + retran_r_io + r_grow_io + rootd_ft_io + sigl_io +
## sorp + tleaf_of_io + tlow_io + tupp_io + l_vg_soil
## * covariance model :
## - type : matern5_2
## - nugget : NO
## - parameters lower bounds : 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10
## - parameters upper bounds : 1.998344 2 1.994471 1.999927 2 2 2 1.996699 2 1.996789 2 2 2 2 1.999374 1.99598 1.998257 2 2 1.996693 1.998361 2 2 2 1.991674 2 2 1.986556 2 2 2 1.998477
## - best initial criterion value(s) : 681.8778
##
## N = 32, M = 5 machine precision = 2.22045e-16
## At X0, 0 variables are exactly at the bounds
## At iterate 0 f= -681.88 |proj g|= 1.8862
## At iterate 1 f = -685.39 |proj g|= 1.6969
## At iterate 2 f = -690.58 |proj g|= 1.7887
## At iterate 3 f = -691.69 |proj g|= 1.8782
## At iterate 4 f = -695.57 |proj g|= 1.474
## At iterate 5 f = -696.51 |proj g|= 1.4301
## At iterate 6 f = -699.81 |proj g|= 1.6632
## At iterate 7 f = -703.66 |proj g|= 1.8304
## At iterate 8 f = -705.51 |proj g|= 1.5102
## At iterate 9 f = -706.54 |proj g|= 1.5512
## At iterate 10 f = -708.23 |proj g|= 1.8447
## At iterate 11 f = -708.9 |proj g|= 1.6452
## At iterate 12 f = -709.09 |proj g|= 1.637
## At iterate 13 f = -709.32 |proj g|= 1.6327
## At iterate 14 f = -709.99 |proj g|= 1.822
## At iterate 15 f = -710.27 |proj g|= 1.0794
## At iterate 16 f = -710.64 |proj g|= 1.1016
## At iterate 17 f = -710.82 |proj g|= 1.3344
## At iterate 18 f = -711.56 |proj g|= 1.6335
## At iterate 19 f = -711.68 |proj g|= 1.4616
## At iterate 20 f = -711.81 |proj g|= 1.083
## At iterate 21 f = -712.22 |proj g|= 1.0842
## At iterate 22 f = -712.58 |proj g|= 1.1808
## At iterate 23 f = -712.68 |proj g|= 0.99465
## At iterate 24 f = -712.73 |proj g|= 0.74038
## At iterate 25 f = -712.78 |proj g|= 1.8315
## At iterate 26 f = -712.81 |proj g|= 1.8336
## At iterate 27 f = -712.94 |proj g|= 1.8352
## At iterate 28 f = -713.02 |proj g|= 1.8172
## At iterate 29 f = -713.05 |proj g|= 0.66021
## At iterate 30 f = -713.09 |proj g|= 0.43437
## At iterate 31 f = -713.12 |proj g|= 0.44507
## At iterate 32 f = -713.21 |proj g|= 1.7356
## At iterate 33 f = -713.26 |proj g|= 1.3057
## At iterate 34 f = -713.29 |proj g|= 1.4468
## At iterate 35 f = -713.32 |proj g|= 1.4686
## At iterate 36 f = -713.37 |proj g|= 1.4705
## At iterate 37 f = -713.39 |proj g|= 0.37368
## At iterate 38 f = -713.4 |proj g|= 0.2782
## At iterate 39 f = -713.4 |proj g|= 1.0599
## At iterate 40 f = -713.4 |proj g|= 0.19422
## At iterate 41 f = -713.41 |proj g|= 0.19192
## At iterate 42 f = -713.41 |proj g|= 0.46662
## At iterate 43 f = -713.42 |proj g|= 0.58493
## At iterate 44 f = -713.43 |proj g|= 0.45128
## At iterate 45 f = -713.45 |proj g|= 1.6998
## At iterate 46 f = -713.45 |proj g|= 0.58155
## At iterate 47 f = -713.46 |proj g|= 0.54353
## At iterate 48 f = -713.47 |proj g|= 0.2181
## At iterate 49 f = -713.47 |proj g|= 0.23092
## At iterate 50 f = -713.47 |proj g|= 0.39126
## At iterate 51 f = -713.47 |proj g|= 0.54593
## At iterate 52 f = -713.48 |proj g|= 0.43935
## At iterate 53 f = -713.48 |proj g|= 0.68417
## At iterate 54 f = -713.48 |proj g|= 0.15831
## At iterate 55 f = -713.48 |proj g|= 0.11442
## At iterate 56 f = -713.48 |proj g|= 0.16225
## At iterate 57 f = -713.49 |proj g|= 0.16222
## At iterate 58 f = -713.49 |proj g|= 0.093246
## At iterate 59 f = -713.49 |proj g|= 0.16234
## At iterate 60 f = -713.49 |proj g|= 0.11056
## At iterate 61 f = -713.49 |proj g|= 0.13681
## At iterate 62 f = -713.49 |proj g|= 0.086683
## At iterate 63 f = -713.49 |proj g|= 0.071366
## At iterate 64 f = -713.49 |proj g|= 0.073598
## At iterate 65 f = -713.49 |proj g|= 0.37024
## At iterate 66 f = -713.49 |proj g|= 0.37446
## At iterate 67 f = -713.49 |proj g|= 0.25901
## At iterate 68 f = -713.49 |proj g|= 0.43179
## At iterate 69 f = -713.49 |proj g|= 0.16825
## At iterate 70 f = -713.49 |proj g|= 0.16214
## At iterate 71 f = -713.49 |proj g|= 0.16223
## At iterate 72 f = -713.49 |proj g|= 0.097911
## At iterate 73 f = -713.49 |proj g|= 0.06467
## At iterate 74 f = -713.49 |proj g|= 0.1245
## At iterate 75 f = -713.49 |proj g|= 0.075958
## At iterate 76 f = -713.49 |proj g|= 0.0425
## At iterate 77 f = -713.49 |proj g|= 0.041319
## At iterate 78 f = -713.5 |proj g|= 0.10335
## At iterate 79 f = -713.5 |proj g|= 0.092153
## At iterate 80 f = -713.5 |proj g|= 0.36409
## At iterate 81 f = -713.5 |proj g|= 0.11798
## At iterate 82 f = -713.5 |proj g|= 0.034605
## At iterate 83 f = -713.5 |proj g|= 0.021465
## At iterate 84 f = -713.5 |proj g|= 0.024109
## At iterate 85 f = -713.5 |proj g|= 0.0423
## At iterate 86 f = -713.5 |proj g|= 0.046687
## At iterate 87 f = -713.5 |proj g|= 0.024203
## At iterate 88 f = -713.5 |proj g|= 0.16315
## At iterate 89 f = -713.5 |proj g|= 0.10832
## At iterate 90 f = -713.5 |proj g|= 0.073464
## At iterate 91 f = -713.5 |proj g|= 0.10057
## At iterate 92 f = -713.5 |proj g|= 0.10677
## At iterate 93 f = -713.5 |proj g|= 0.10335
## At iterate 94 f = -713.5 |proj g|= 0.2692
## At iterate 95 f = -713.5 |proj g|= 0.18251
## At iterate 96 f = -713.5 |proj g|= 0.16739
## At iterate 97 f = -713.5 |proj g|= 0.25655
## At iterate 98 f = -713.5 |proj g|= 0.28924
## At iterate 99 f = -713.5 |proj g|= 0.39099
## At iterate 100 f = -713.5 |proj g|= 0.18433
## At iterate 101 f = -713.5 |proj g|= 0.078975
## final value -713.501662
## stopped after 101 iterations
anom_loo <- pca$rotation[ ,1:2] %*% t(scores_em_mean_train[,1:2])*pca$scale
tens_loo <- t(anom_loo + pca$center)
anom_test <- pca$rotation[ ,1:2] %*% t(scores_em_mean_test[,1:2])*pca$scale
tens_test <- t(anom_test + pca$center)
par(mfrow = c(1,3))
for(i in 1:npc){
plot( pca$x[,i], scores_em_mean_train[,i])
abline(0,1)
}
Y_loo_err <- tens_loo - Y_train
par(mfrow = c(1,2))
matplot(years_trunc, t(Y_train),
type = 'l',
lty = 'solid',
col = 'black',
xlab = 'years',
ylab = 'test',
main = 'Leave-one-out prediction error',
ylim = c(-50,200)
)
matlines(years_trunc, t(tens_loo),
type = 'l',
lty = 'solid',
col = 'red',
)
matplot(years_trunc, t(Y_loo_err),
type = 'l',
lty = 'solid',
col = 'black',
xlab = 'years',
ylab = 'test - predicted output',
main = 'test prediction error',
ylim = c(-100,100)
)
Y_test_err <- tens_test - Y_test
par(mfrow = c(1,2))
matplot(years_trunc, t(Y_test),
type = 'l',
lty = 'solid',
col = 'black',
xlab = 'years',
ylab = 'test',
main = 'Y_test',
ylim = c(-50,200)
)
matlines(years_trunc, t(tens_test),
type = 'l',
lty = 'solid',
col = 'red',
)
matplot(years_trunc, t(Y_test_err),
type = 'l',
lty = 'solid',
col = 'black',
xlab = 'years',
ylab = 'test - predicted output',
main = 'test prediction error',
ylim = c(-100,100)
)
## Is this right? How about testing how good emulating the first (or last) point is? Are the “zero carbon cycle” and “failures” causing problems with the emulator?
y_sp_train <- Y_train[ ,1]
y_sp_test <- Y_test[ ,1]
fit <- km(~., design = X_train, response = y_sp_train)
##
## optimisation start
## ------------------
## * estimation method : MLE
## * optimisation method : BFGS
## * analytical gradient : used
## * trend model : ~alpha_io + a_wl_io + bio_hum_cn + b_wl_io + dcatch_dlai_io +
## dqcrit_io + dz0v_dh_io + f0_io + fd_io + g_area_io + g_root_io +
## g_wood_io + gs_nvg_io + hw_sw_io + kaps_roth + knl_io + lai_max_io +
## lai_min_io + lma_io + n_inorg_turnover + nmass_io + nr_io +
## retran_l_io + retran_r_io + r_grow_io + rootd_ft_io + sigl_io +
## sorp + tleaf_of_io + tlow_io + tupp_io + l_vg_soil
## * covariance model :
## - type : matern5_2
## - nugget : NO
## - parameters lower bounds : 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10
## - parameters upper bounds : 1.998344 2 1.994471 1.999927 2 2 2 1.996699 2 1.996789 2 2 2 2 1.999374 1.99598 1.998257 2 2 1.996693 1.998361 2 2 2 1.991674 2 2 1.986556 2 2 2 1.998477
## - best initial criterion value(s) : -1731.7
##
## N = 32, M = 5 machine precision = 2.22045e-16
## At X0, 0 variables are exactly at the bounds
## At iterate 0 f= 1731.7 |proj g|= 1.8431
## At iterate 1 f = 1725.7 |proj g|= 1.7078
## At iterate 2 f = 1715.7 |proj g|= 1.7668
## At iterate 3 f = 1711.2 |proj g|= 1.7149
## At iterate 4 f = 1699.6 |proj g|= 1.9473
## At iterate 5 f = 1698.2 |proj g|= 1.7839
## At iterate 6 f = 1694.1 |proj g|= 1.8427
## ys=-1.848e-01 -gs= 4.051e+00, BFGS update SKIPPED
## At iterate 7 f = 1676.7 |proj g|= 1.9325
## At iterate 8 f = 1665.8 |proj g|= 1.9131
## At iterate 9 f = 1660 |proj g|= 1.8914
## At iterate 10 f = 1654.5 |proj g|= 1.8321
## At iterate 11 f = 1652.6 |proj g|= 1.8544
## At iterate 12 f = 1651 |proj g|= 1.1765
## At iterate 13 f = 1649.2 |proj g|= 1.1722
## At iterate 14 f = 1648.2 |proj g|= 1.8488
## At iterate 15 f = 1646.9 |proj g|= 1.8481
## At iterate 16 f = 1642.9 |proj g|= 1.0939
## At iterate 17 f = 1639 |proj g|= 1.0351
## At iterate 18 f = 1638.5 |proj g|= 1.8282
## At iterate 19 f = 1637.4 |proj g|= 1.8254
## At iterate 20 f = 1635.6 |proj g|= 1.7282
## At iterate 21 f = 1635.2 |proj g|= 1.5999
## At iterate 22 f = 1633.5 |proj g|= 1.8229
## At iterate 23 f = 1633 |proj g|= 1.8169
## At iterate 24 f = 1631.2 |proj g|= 1.1789
## At iterate 25 f = 1630.6 |proj g|= 1.8223
## At iterate 26 f = 1630.4 |proj g|= 1.8146
## At iterate 27 f = 1630.3 |proj g|= 1.7245
## At iterate 28 f = 1630.2 |proj g|= 1.4229
## At iterate 29 f = 1629.8 |proj g|= 1.8136
## At iterate 30 f = 1629.2 |proj g|= 1.2648
## At iterate 31 f = 1629.1 |proj g|= 1.8165
## At iterate 32 f = 1628.8 |proj g|= 1.8172
## At iterate 33 f = 1628.6 |proj g|= 1.7298
## At iterate 34 f = 1628.5 |proj g|= 1.2649
## At iterate 35 f = 1628.5 |proj g|= 1.2429
## At iterate 36 f = 1628.3 |proj g|= 1.1536
## At iterate 37 f = 1628.1 |proj g|= 1.1453
## At iterate 38 f = 1628.1 |proj g|= 1.8103
## At iterate 39 f = 1628 |proj g|= 1.8124
## At iterate 40 f = 1627.9 |proj g|= 0.60535
## At iterate 41 f = 1627.9 |proj g|= 0.6034
## At iterate 42 f = 1627.9 |proj g|= 0.96437
## At iterate 43 f = 1627.9 |proj g|= 0.34309
## At iterate 44 f = 1627.9 |proj g|= 0.39325
## At iterate 45 f = 1627.9 |proj g|= 0.42079
## At iterate 46 f = 1627.9 |proj g|= 0.23201
## At iterate 47 f = 1627.9 |proj g|= 0.30153
## At iterate 48 f = 1627.9 |proj g|= 0.57704
## At iterate 49 f = 1627.9 |proj g|= 0.26205
## At iterate 50 f = 1627.9 |proj g|= 0.24962
## At iterate 51 f = 1627.9 |proj g|= 0.25086
## At iterate 52 f = 1627.9 |proj g|= 0.39192
## At iterate 53 f = 1627.9 |proj g|= 1.0372
## At iterate 54 f = 1627.9 |proj g|= 0.66916
## At iterate 55 f = 1627.9 |proj g|= 0.37207
## At iterate 56 f = 1627.9 |proj g|= 0.55944
## At iterate 57 f = 1627.9 |proj g|= 0.25217
## At iterate 58 f = 1627.9 |proj g|= 0.28757
## At iterate 59 f = 1627.9 |proj g|= 0.10602
## At iterate 60 f = 1627.9 |proj g|= 0.14743
## At iterate 61 f = 1627.9 |proj g|= 0.051709
## At iterate 62 f = 1627.9 |proj g|= 0.10027
## At iterate 63 f = 1627.9 |proj g|= 0.20563
## At iterate 64 f = 1627.9 |proj g|= 0.11686
## At iterate 65 f = 1627.9 |proj g|= 0.16813
## At iterate 66 f = 1627.9 |proj g|= 0.19414
## At iterate 67 f = 1627.9 |proj g|= 0.093691
## At iterate 68 f = 1627.9 |proj g|= 0.097095
## At iterate 69 f = 1627.9 |proj g|= 0.10811
## At iterate 70 f = 1627.9 |proj g|= 0.057755
## At iterate 71 f = 1627.9 |proj g|= 0.08492
## At iterate 72 f = 1627.9 |proj g|= 0.027138
## At iterate 73 f = 1627.9 |proj g|= 0.018426
##
## iterations 73
## function evaluations 94
## segments explored during Cauchy searches 84
## BFGS updates skipped 1
## active bounds at final generalized Cauchy point 25
## norm of the final projected gradient 0.018426
## final function value 1627.88
##
## F = 1627.88
## final value 1627.877583
## converged
pred <- predict.km(fit, newdata = X_test, type = 'UK')
par(mfrow = c(1,2))
plot(y_sp_test, pred$mean)
abline(0,1)
plot(y_sp_test - pred$mean )
twoStep_glmnet <- function(X, y, nugget=NULL, nuggetEstim=FALSE, noiseVar=NULL, seed=NULL, trace=FALSE, maxit=100,
REPORT=10, factr=1e7, pgtol=0.0, parinit=NULL, popsize=100){
# Use lasso to reduce input dimension of emulator before
# building.
control_list = list(trace=trace, maxit=maxit, REPORT=REPORT, factr=factr, pgtol=pgtol, pop.size=popsize)
xvars = colnames(X)
data = data.frame(response=y, x=X)
colnames(data) <- c("response", xvars)
nval = length(y)
# fit a lasso by cross validation
library(glmnet)
fit_glmnet_cv = cv.glmnet(x=X,y=y)
# The labels of the retained coefficients are here
# (retains intercept at index zero)
coef_i = (coef(fit_glmnet_cv, s = "lambda.1se"))@i
labs = labels(coef(fit_glmnet_cv, s = "lambda.1se"))[[1]]
labs = labs[-1] # remove intercept
glmnet_retained = labs[coef_i]
start_form = as.formula(paste("~ ", paste(glmnet_retained , collapse= "+")))
m = km(start_form, design=X, response=y, nugget=nugget, parinit=parinit,
nugget.estim=nuggetEstim, noise.var=noiseVar, control=control_list)
return(list(x=X, y=y, nugget=nugget, nuggetEstim=nuggetEstim,
noiseVar=noiseVar, emulator=m, seed=seed, coefs=m@covariance@range.val,
trends=m@trend.coef, meanTerms=all.vars(start_form), fit_glmnet_cv=fit_glmnet_cv))
}
twostep_test1 <- twoStep_glmnet(X = X_train, y = pca$x[,1])
## Loading required package: Matrix
## Loaded glmnet 4.1-2
twostep_test2 <- twoStep_glmnet(X = X_train, y = pca$x[,2])
twostep_test3 <- twoStep_glmnet(X = X_train, y = pca$x[,3])
twostep_loo1 <- leaveOneOut.km(model = twostep_test1$emulator, type = 'UK', trend.reestim = TRUE)
# The twostep error is a little lower. Not much though.
plot(pca$x[,1], scores_em_mean_train[,1])
points(pca$x[,1], twostep_loo1$mean, col = 'red')
abline(0,1)
mean(abs(scores_em_mean_train[,1] - pca$x[,1]))
## [1] 3.431702
mean(abs(twostep_loo1$mean - pca$x[,1]))
## [1] 3.371348
anomalizeTSmatrix = function(x, ix){
# Anomalise a timeseries matrix
subx = x[ ,ix]
sweepstats = apply(subx, 1, FUN=mean)
anom = sweep(x, 1, sweepstats, FUN = '-')
anom
}
npp_anom_ens_wave00 <- anomalizeTSmatrix(npp_ens_wave00, 90:111)
Y_anom <- npp_anom_ens_wave00
Y_train_anom <- Y_anom[train_ix, years_ix]
Y_test_anom <- Y_anom[test_ix, years_ix]
pca_anom <- prcomp(Y_train_anom, center = TRUE, scale = TRUE)
par(mfrow = c(1,2))
matplot(years_trunc, t(Y_train_anom),
type = 'l',
lty = 'solid',
col = 'black',
xlab = 'years',
ylab = 'test',
main = 'Leave-one-out prediction error',
ylim = c(-10,50)
)
plot(pca_anom)
# How many principle components do we wish to keep?
npc <- 4
scores_train_anom <- pca_anom$x[ ,1:npc]
# project the truncated scores back up, to see how well they reproduce the
# original data
#anom <- pca$rotation[ ,1:npc] %*% t(scores)*pca$scale
#tens <- t(anom + pca$center)
library(parallel)
scores_train_anom_list <- mat2list(scores_train_anom)
fit_list_scores_train_anom <- mclapply(X = scores_train_anom_list, FUN = km, formula = ~., design = X_train,
mc.cores = 4, control = list(trace = FALSE))
scores_train_anom_em_loo <- matrix(ncol = ncol(scores_train_anom), nrow = nrow(scores_train_anom))
for(i in 1:ncol(scores_train_anom_em_loo)){
pred <- leaveOneOut.km(model = fit_list_scores_train_anom[[i]], type = 'UK', trend.reestim = TRUE)
scores_train_anom_em_loo[ ,i] <- pred$mean
}
par(mfrow = c(2,2))
for(i in 1:npc){
plot(scores_train_anom[,i], scores_train_anom_em_loo[,i])
abline(0,1)
}
anom_anom_loo <- pca_anom$rotation[ ,1:npc] %*% t(scores_train_anom_em_loo[,1:npc])*pca_anom$scale
anom_tens_loo <- t(anom_anom_loo + pca_anom$center)
#anom_test <- pca$rotation[ ,1:2] %*% t(scores_em_mean_test[,1:2])*pca$scale
#tens_test <- t(anom_test + pca$center)
Y_anom_loo_err <- anom_tens_loo - Y_train_anom
par(mfrow = c(1,2))
matplot(years_trunc, t(Y_train_anom),
type = 'l',
lty = 'solid',
col = 'black',
xlab = 'years',
ylab = 'test',
main = 'Anomaly ensemble',
ylim = c(-10,30)
)
matlines(years_trunc, t(anom_tens_loo),
type = 'l',
lty = 'solid',
col = 'red',
)
matplot(years_trunc, t(Y_anom_loo_err),
type = 'l',
lty = 'solid',
col = 'black',
xlab = 'years',
ylab = 'test - predicted output',
main = 'test prediction error',
ylim = c(-15,15)
)
# seeing sparkline pairs of ensemble members would be good
par(mfrow = c(20, 20), mar = c(0.05, 0.05, 0.05, 0.05))
for(i in 1:398){
plot(Y_train_anom[i, ], axes = FALSE, type = 'l', lty = 'solid', ylim = c(-10,30))
lines(anom_tens_loo[i,], col = 'red')
}
scores_test_anom_em <- NULL
for(i in 1:npc){
pred <- predict.km(fit_list_scores_train_anom[[i]], newdata = X_test, type = 'UK')
scores_test_anom_em <- cbind(scores_test_anom_em, pred$mean)
}
#anom_loo <- pca$rotation[ ,1:2] %*% t(scores_em_mean_train[,1:2])*pca$scale
#tens_loo <- t(anom_loo + pca$center)
anom_anom <- pca_anom$rotation[ ,1:2] %*% t(scores_test_anom_em[,1:2])*pca_anom$scale
tens_anom_test <- t(anom_anom + pca_anom$center)
# seeing sparkline pairs of ensemble members would be good
par(mfrow = c(10, 10), mar = c(0.1, 0.1, 0.1, 0.1))
for(i in 1:100){
plot(Y_test_anom[i, ], axes = FALSE, type = 'l', lty = 'solid', ylim = c(-10,30))
lines(tens_anom_test[i,], col = 'red')
}
# Key should be that it gives you back the training fits, which you can then use for newdata etc.
pca_km <- function(X, Y, train_ix, test_ix, npc, scale = FALSE, center = TRUE, ...){
# emulate high dimensional output
require(parallel)
# Split into training and test samples
X_train <- X[train_ix, ]
X_test <- X[test_ix, ]
Y_train <- Y[train_ix, ]
Y_test <- Y[test_ix, ]
#reduce dimension of the output
pca <- prcomp(Y_train, scale = scale, center = center)
scores_train_list <- mat2list(pca$x[, 1:npc])
# fitting the emulator is a slow step, so we use parallel computation
# Fit an emulator to each principal component in turn
fit_list <- mclapply(X = scores_train_list, FUN = km, formula = ~., design = X_train,
mc.cores = 4, control = list(trace = FALSE, maxit = 200))
scores_em_test_mean <- NULL
scores_em_test_sd <- NULL
scores_em_train_mean <- NULL
scores_em_train_sd <- NULL
for(i in 1:npc){
loo <- leaveOneOut.km(fit_list[[i]], type = 'UK', trend.reestim = TRUE)
pred_test <- predict(fit_list[[i]], newdata = X_test, type = 'UK')
# Predict training data (low dimension representation)
scores_em_train_mean <- cbind(scores_em_train_mean, loo$mean)
scores_em_train_sd <- cbind(scores_em_train_sd, loo$sd)
# Predict test data (low dimension representation)
scores_em_test_mean <- cbind(scores_em_test_mean, pred_test$mean)
scores_em_test_sd <- cbind(scores_em_test_sd, pred_test$sd)
}
# Project back up to high dimension
if(scale){
anom_train <- pca$rotation[ ,1:npc] %*% t(scores_em_train_mean[,1:npc])*pca$scale
anom_test <- pca$rotation[ ,1:npc] %*% t(scores_em_test_mean[,1:npc])*pca$scale
sd_train <- pca$rotation[ ,1:npc] %*% t(scores_em_train_sd[,1:npc])*pca$scale
sd_test <- pca$rotation[ ,1:npc] %*% t(scores_em_test_sd[,1:npc])*pca$scale
}
else{
anom_train <- pca$rotation[ ,1:npc] %*% t(scores_em_train_mean[,1:npc])
anom_test <- pca$rotation[ ,1:npc] %*% t(scores_em_test_mean[,1:npc])
sd_train <- pca$rotation[ ,1:npc] %*% t(scores_em_train_sd[,1:npc])
sd_test <- pca$rotation[ ,1:npc] %*% t(scores_em_test_sd[,1:npc])
}
Ypred_train <- t(anom_train + pca$center)
Ypred_test <- t(anom_test + pca$center)
Yerr_train <- Y_train - Ypred_train
Yerr_test <- Y_test - Ypred_test
return(list(train_ix = train_ix,
test_ix = test_ix,
X_train = X_train,
X_test = X_test,
pca = pca,
fit_list = fit_list,
scores_em_train_mean = scores_em_train_mean,
scores_em_train_sd = scores_em_train_sd,
scores_em_test_mean = scores_em_test_mean,
scores_em_test_sd = scores_em_test_sd,
Ypred_train = Ypred_train,
Ypred_test = Ypred_test
))
}
# How about we look at post 1950? Maybe that is messing it up?
#years_ix <- 101:164
#train_ix <- (1:399)[-288] # Number 288 has numerical problems for NPP
#test_ix <- 400:499
X_trunc<- X[-288, ]
Y_trunc <- npp_ens_wave00[-288, years_ix ]
train_ix = 1:398
test_ix = 399:498
pc_km_npp <- pca_km(X = X_trunc,
Y_trunc,
train_ix = train_ix, test_ix = test_ix,
npc = 3,
scale = FALSE,
center = TRUE)
plot(pc_km_npp$pca$x[,1],pc_km_npp$scores_em_train_mean[,1] )
abline(0,1)
# seeing sparkline pairs of ensemble members would be good
par(mfrow = c(10, 10), mar = c(0.1, 0.1, 0.1, 0.1))
for(i in 1:100){
plot(years_trunc, (Y_trunc[test_ix, ])[i, ], ylim = c(-50, 200), type = 'l', axes = FALSE)
lines(years_trunc, pc_km_npp$Ypred_test[i, ], col = 'red')
}
npp_anom_ens_wave00 <- anomalizeTSmatrix(npp_ens_wave00, 90:111)
# How about we look at post 1950? Maybe that is messing it up?
#years_ix <- 101:164
#train_ix <- (1:399)[-288] # Number 288 has numerical problems for NPP
#test_ix <- 400:499
X_trunc<- X[-288, ]
Y_trunc <- npp_anom_ens_wave00[-288, years_ix ]
train_ix = 1:398
test_ix = 399:498
pc_km_npp_anom <- pca_km(X = X_trunc,
Y_trunc,
train_ix = train_ix,
test_ix = test_ix,
npc = 5,
scale = TRUE,
center = TRUE)
# seeing sparkline pairs of ensemble members would be good
par(mfrow = c(10, 10), mar = c(0.05, 0.05, 0.05, 0.05), oma = c(3,0.1,3,0.1))
for(i in 1:100){
plot(years_trunc, (Y_trunc[test_ix, ])[i, ], ylim = c(-5, 20), type = 'l', axes = FALSE)
lines(years_trunc, pc_km_npp_anom$Ypred_test[i, ], col = 'red')
}
reset()
mtext(text = 'NPP anomaly test set predictions', side = 3, cex = 1.5, line = -2, outer = TRUE)
legend('bottom', col = c('black', 'red'), legend = c('observed', 'predicted'), horiz = TRUE, lty = 'solid')
par(mfrow = c(2,1))
matplot(years_trunc, t(Y_trunc[train_ix, ]), type = 'l', lty = 'solid', col = 'black',
main = "Train",
ylab = 'NPP anomaly from 1940 - 1959 mean')
matlines(years_trunc, t(pc_km_npp_anom$Ypred_train), lty = 'solid', col = 'red')
legend('topleft', col = c('black', 'red'), legend = c('Observed', 'Predicted'), lty = 'solid')
matplot(years_trunc, t(Y_trunc[test_ix, ]), type = 'l', lty = 'solid', col = 'black',
main = 'Test',
ylab = 'NPP anomaly from 1940 - 1959 mean')
matlines(years_trunc, t(pc_km_npp_anom$Ypred_test), lty = 'solid', col = 'red')
legend('topleft', col = c('black', 'red'), legend = c('Observed', 'Predicted'), lty = 'solid')
McNeall et al (2023) found that carbon cycle tends to die if f0 > 0.9 or b_wl < 0.15. If these are right, they seem pretty good.
# Excise ensemble members we know to cause problems
keep_ix <- which(X[, 'f0_io'] < 0.9 & X[, 'b_wl_io'] > 0.15 )
keep_ix <- keep_ix[ - which(keep_ix == 288)]
X_trunc<- X[keep_ix, ]
Y_trunc <- npp_anom_ens_wave00[keep_ix, years_ix ]
train_ix = 1:300
test_ix = 301:374
pc_km_npp_anom <- pca_km(X = X_trunc,
Y_trunc,
train_ix = train_ix,
test_ix = test_ix,
npc = 5,
scale = TRUE,
center = TRUE)
par(mfrow = c(2,1))
matplot(years_trunc, t(Y_trunc[train_ix, ]), type = 'l', lty = 'solid', col = 'black',
main = "Train",
ylab = 'NPP anomaly from 1940 - 1959 mean')
matlines(years_trunc, t(pc_km_npp_anom$Ypred_train), lty = 'solid', col = 'red')
legend('topleft', col = c('black', 'red'), legend = c('Observed', 'Predicted'), lty = 'solid')
matplot(years_trunc, t(Y_trunc[test_ix, ]), type = 'l', lty = 'solid', col = 'black',
main = 'Test',
ylab = 'NPP anomaly from 1940 - 1959 mean')
matlines(years_trunc, t(pc_km_npp_anom$Ypred_test), lty = 'solid', col = 'red')
legend('topleft', col = c('black', 'red'), legend = c('Observed', 'Predicted'), lty = 'solid')
# seeing sparkline pairs of ensemble members would be good
par(mfrow = c(10, 10), mar = c(0.05, 0.05, 0.05, 0.05), oma = c(3,0.1,3,0.1))
for(i in 1:length(test_ix)){
plot(years_trunc, (Y_trunc[test_ix, ])[i, ], ylim = c(-5, 20), type = 'l', axes = FALSE)
lines(years_trunc, pc_km_npp_anom$Ypred_test[i, ], col = 'red')
}
reset()
mtext(text = 'NPP anomaly test set predictions', side = 3, cex = 1.5, line = -2, outer = TRUE)
legend('bottom', col = c('black', 'red'), legend = c('observed', 'predicted'), horiz = TRUE, lty = 'solid')
# Excise ensemble members we know to cause problems
keep_ix <- which(X[, 'f0_io'] < 0.9 & X[, 'b_wl_io'] > 0.15 )
keep_ix <- keep_ix[ - which(keep_ix == 288)]
X_trunc <- X[keep_ix, ]
Y_trunc <- npp_ens_wave00[keep_ix, years_ix ]
train_ix = 1:300
test_ix = 301:374
pc_km_npp <- pca_km(X = X_trunc,
Y_trunc,
train_ix = train_ix,
test_ix = test_ix,
npc = 3,
scale = TRUE,
center = TRUE)
par(mfrow = c(2,1))
matplot(years_trunc, t(Y_trunc[train_ix, ]), type = 'l', lty = 'solid', col = 'black',
main = "Train",
ylab = 'NPP from 1940 - 1959 mean')
matlines(years_trunc, t(pc_km_npp$Ypred_train), lty = 'solid', col = 'red')
legend('topleft', col = c('black', 'red'), legend = c('Observed', 'Predicted'), lty = 'solid')
matplot(years_trunc, t(Y_trunc[test_ix, ]), type = 'l', lty = 'solid', col = 'black',
main = 'Test',
ylab = 'NPP from 1940 - 1959 mean')
matlines(years_trunc, t(pc_km_npp$Ypred_test), lty = 'solid', col = 'red')
legend('topleft', col = c('black', 'red'), legend = c('Observed', 'Predicted'), lty = 'solid')
# seeing sparkline pairs of ensemble members would be good
par(mfrow = c(10, 10), mar = c(0,0,0,0), oma = c(3,0.1,3,0.1))
for(i in 1:length(test_ix)){
plot(years_trunc, (Y_trunc[test_ix, ])[i, ], ylim = c(0, 120), type = 'l', xaxt = 'n', yaxt = 'n')
lines(years_trunc, pc_km_npp$Ypred_test[i, ], col = 'red')
}
reset()
mtext(text = 'NPP test set predictions', side = 3, cex = 1.5, line = -2, outer = TRUE)
legend('bottom', col = c('black', 'red'), legend = c('observed', 'predicted'), horiz = TRUE, lty = 'solid')
plot(Y_trunc[test_ix, ], pc_km_npp$Ypred_test, xlim = c(0,120), ylim = c(0,120))
abline(0,1)
cnbp_ens_wave00 <- t(apply(nbp_ens_wave00, 1, FUN = cumsum))
matplot(years, t(cnbp_ens_wave00[-288,]), col = 'black', type = 'l', lty = 'solid')